require(rgdal)
require(maptools)
require(rgeos)
require(plyr)
require(xlsx)
require(car)

italy_box_y <- c(36, 47.5)
italy_box_x <- c(6.1, 19.5) 

# Functions
sqLiteConnect <- function(database, table) {
  require(DBI)
  con <- dbConnect(RSQLite::SQLite(), dbname = database)
  query <- dbSendQuery(con, paste("SELECT * FROM ", table, ";", sep="")) 
  result <- fetch(query, n = -1)
  dbClearResult(query)
  dbDisconnect(con)
  return(result)
}


# Regionali 2010
require(stringr)
electionday2010 <- as.Date('2010-03-28')
election2010_m5s_provincia <- data.frame()

regions <- c("Campania", "Lombardia", "Piemonte", "Veneto", "Emilia-Romagna")
# DOWNLOAD from http://elezionistorico.interno.it/
base_dir <- 'elezioni2010-2012/regionali2010/' # NOT PROVIDED
for (r in regions) {
  print(r)
  filename <- paste0("LISTE-Regionali-28_03_2010-ITALIA-", toupper(r), "-Provincia.csv")
  liste <- read.csv(paste0(base_dir,filename), skip=1, sep = ";", row.names = NULL)
  liste <-
    subset(liste, `Liste.e.Coalizioni` == 'MOVIMENTO 5 STELLE BEPPEGRILLO.IT', 
           select = c('Ente', 'Voti.lista'))
  liste$Ente <- str_trim(liste$Ente)
  
  filename <- paste0("SCRUTINI-Regionali-28_03_2010-ITALIA-", toupper(r), "-Provincia.csv")
  scrutini <- read.csv(paste0(base_dir,filename), skip=1, sep = ";", row.names = NULL)
  names(scrutini) <- c(names(scrutini)[2:ncol(scrutini)], NA)
  scrutini[,ncol(scrutini)] <- NULL
  scrutini$Ente <- str_trim(scrutini$Ente)
  
  tmp_df <- merge(liste, scrutini)
  tmp_df$regione <- r
  election2010_m5s_provincia <- rbind(election2010_m5s_provincia, tmp_df)
}

meetup_user <-  sqLiteConnect("m5s_meetup_jun2015.sqlite", "member")
meetup_user <- subset(meetup_user, joined < electionday2010)

meetup_user <- meetup_user[meetup_user$lat >=  italy_box_y[1] & meetup_user$lat <= italy_box_y[2], ]
meetup_user <- meetup_user[meetup_user$lon >=  italy_box_x[1] & meetup_user$lon <=  italy_box_x[2], ]

## Count participation by comune
xy <- cbind(meetup_user$lon, meetup_user$lat)
sp_points <-  SpatialPoints(xy)
proj4string(sp_points) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

# DOWNLOAD from http://www.istat.it
italy_map_2010 <-  
  readOGR(dsn = "ISTAT/Prov2010_WGS84_g", layer = "Prov2010_WGS84_g", verbose=FALSE) # Not provided
italy_map_2010@data$id = rownames(italy_map_2010@data)
gpclibPermit() #avoid "Error: isTRUE(gpclibPermitStatus()) is not TRUE" in the next command
italy_map_2010.points = fortify(italy_map_2010,region="id")
italy_map_2010.df = join(italy_map_2010.points, italy_map_2010@data, by="id")

## Transform
utmStr <- "+proj=utm +zone=%d +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
crs <- CRS(sprintf(utmStr, 32))
italy_map_2010 <- spTransform(italy_map_2010, crs)
sp_points <- spTransform(sp_points, crs)

# ### Set up container for results
# n <- length(sp_points)
# nearestProvincia <- character(n)
# for (i in seq_along(nearestProvincia)) {
#   print(n - i)
#   nearestProvincia[i] <- 
#     as.character(italy_map_2010$PROVINCIA)[which.min(gDistance(sp_points[i,], italy_map_2010, byid=TRUE))]
# }
# meetup_user$nearest_provincia <- nearestProvincia
# save(meetup_user, file = '~/Desktop/r_work_directory/meetup_users_provincia_2010.RData')
users_by_provincia <- as.data.frame(table(meetup_user$nearest_provincia))

election2010_m5s_provincia <-
  merge(election2010_m5s_provincia, users_by_provincia, all.x = TRUE,
        by.x = "Ente", by.y = 'Var1')

names(election2010_m5s_provincia)[2:4] <- c('votes','electors','voters')
names(election2010_m5s_provincia)[8] <- 'members'

election2010_m5s_provincia$votes_ratio <- 
  with(election2010_m5s_provincia, votes / voters)
election2010_m5s_provincia$members_ratio <- 
  with(election2010_m5s_provincia, members / voters)

italy_map_2010 <-
  merge(italy_map_2010, election2010_m5s_provincia[,c('Ente','votes','electors','voters','members')],
        by.x = 'PROVINCIA', by.y = 'Ente')

## MODEL 
mod2010 <- lm(log(votes) ~ log(voters) + log(members), 
              data = election2010_m5s_provincia)
mod2010_b <- lm(votes_ratio ~ 
                  log(voters) + members_ratio, 
                data = election2010_m5s_provincia)
scatterplot(election2010_m5s_provincia$votes_ratio, 
            election2010_m5s_provincia$members_ratio)


# Politiche 2013
electionday2013 <- as.Date('2013-02-24')
load("minint_election_data_2013-2014.RData")

# Load Voti M5S
voti_m5s <- subset(par_it_2013_liste, 
                   DESCRLISTA == "MOVIMENTO 5 STELLE BEPPEGRILLO.IT" &
                     CODTIPOELEZIONE == "C")
names(par_it_2013_scrutini)[4] <- "CODICECOMUNE"
voti_m5s <- merge(voti_m5s, 
                  subset(par_it_2013_scrutini,
                         CODTIPOELEZIONE == "C",
                         select = c("CODICECOMUNE", "NUMELETTORI", "NUMVOTANTITOTALI","SKNONVALIDE")))
voti_m5s$VOTIVALIDI <- with(voti_m5s, NUMVOTANTITOTALI - SKNONVALIDE)
voti_m5s$votem5s_perc <- with(voti_m5s, VOTILISTA / VOTIVALIDI)
votem5s_nat_perc <- with(voti_m5s, sum(VOTILISTA) / sum(VOTIVALIDI))
voti_m5s$CODICECOMUNE <- as.factor(voti_m5s$CODICECOMUNE)
voti_m5s$votem5s_exp <- voti_m5s$VOTIVALIDI * votem5s_nat_perc
voti_m5s$votem5s_sr <- with(voti_m5s, VOTILISTA / votem5s_exp)
load("minint_istat_junct_tables.RData")
voti_m5s <- merge(voti_m5s, 
                  minint_istat_2013_junct_table[,c('cod_comune_minint','cod_comune_istat')], 
                  by.x = "CODICECOMUNE", by.y = "cod_comune_minint") 


meetup_user <-  sqLiteConnect("m5s_meetup_jun2015.sqlite", "member")
meetup_user <- subset(meetup_user, joined < electionday2013)

meetup_user <- meetup_user[meetup_user$lat >=  italy_box_y[1] & meetup_user$lat <= italy_box_y[2], ]
meetup_user <- meetup_user[meetup_user$lon >=  italy_box_x[1] & meetup_user$lon <=  italy_box_x[2], ]

## Load Provincie
italy_map_2013 <- 
  readOGR(dsn = "/ISTAT/Prov_2013_WGS84_g", layer = "Prov2013_WGS84_g", verbose=FALSE) # Not provided

italy_map_comuni_2013 <-
  as.data.frame(readOGR(dsn = "/ISTAT/Com2013_WGS84_g", layer = "Com2013_WGS84_g", verbose=FALSE)) # Not provided

prov_comuni_join <- italy_map_comuni_2013[,c('COD_PRO','PRO_COM')]
voti_m5s <- merge(voti_m5s, prov_comuni_join, by.x = 'cod_comune_istat', by.y = 'PRO_COM', all.x = TRUE)
voti_m5s$COD_PRO[voti_m5s$DESCRCOMUNE=='QUERO'] <- 25
voti_m5s$COD_PRO[voti_m5s$DESCRCOMUNE=='VAS'] <- 25
voti_m5s$COD_PRO[voti_m5s$DESCRCOMUNE=='MONTORO INFERIORE'] <- 64
voti_m5s$COD_PRO[voti_m5s$DESCRCOMUNE=='MONTORO SUPERIORE'] <- 64

require(dplyr)
voti_m5s_by_provincia <-
  voti_m5s %>%
  dplyr::group_by(COD_PRO) %>%
  dplyr::summarize(voti = sum(VOTILISTA),
                   elettori = sum(NUMELETTORI),
                   votanti = sum(NUMVOTANTITOTALI))
voti_m5s_by_provincia <-
  merge(voti_m5s_by_provincia, 
        as.data.frame(italy_map_2013[,c('COD_PRO','PROVINCIA')]),
        all.x = TRUE,
        by = 'COD_PRO')
  
## Count participation by comune
xy <- cbind(meetup_user$lon, meetup_user$lat)
sp_points <-  SpatialPoints(xy)
proj4string(sp_points) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

## Transform
utmStr <- "+proj=utm +zone=%d +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
crs <- CRS(sprintf(utmStr, 32))
italy_map_2013 <- spTransform(italy_map_2013, crs)
sp_points <- spTransform(sp_points, crs)

res <- over(sp_points, italy_map_2013)
meetup_user$provincia <- res$PROVINCIA

# Out-at-sea
meetup_user_out_at_sea <- subset(meetup_user, is.na(provincia))
xy <- cbind(meetup_user_out_at_sea$lon, meetup_user_out_at_sea$lat)
sp_points <-  SpatialPoints(xy)
proj4string(sp_points) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
sp_points <- spTransform(sp_points, crs)

# ### Set up container for results
n <- length(sp_points)
nearestProvincia <- character(n)
for (i in seq_along(nearestProvincia)) {
  print(n - i)
  nearestProvincia[i] <-
    as.character(italy_map_2013$PROVINCIA)[which.min(gDistance(sp_points[i,], italy_map_2013, byid=TRUE))]
}
meetup_user_out_at_sea$provincia <- nearestProvincia

meetup_user <- rbind(subset(meetup_user, !is.na(provincia)),
                     meetup_user_out_at_sea)
users_by_provincia <- as.data.frame(table(meetup_user$provincia))

voti_m5s_by_provincia <-
  merge(voti_m5s_by_provincia, users_by_provincia, all.x = TRUE,
        by.x = 'PROVINCIA', by.y = 'Var1')

names(voti_m5s_by_provincia)[3:6] <- c('votes', 'electors', 'voters', 'members') 

election2013_m5s_provincia <- voti_m5s_by_provincia

election2013_m5s_provincia$votes_ratio <- 
  with(election2013_m5s_provincia, votes / voters)
election2013_m5s_provincia$members_ratio <- 
  with(election2013_m5s_provincia, members / voters)

italy_map_2013 <- 
  merge(italy_map_2013, election2013_m5s_provincia[,c('COD_PRO','votes','electors','voters','members')], 
        by = 'COD_PRO')

## MODEL 
mod2013 <- lm(log(votes) ~ log(voters) + log(members), 
                data = election2013_m5s_provincia)
mod2013_b <- lm(votes_ratio ~ 
                  log(voters) + members_ratio, 
                data = election2013_m5s_provincia)
scatterplot(election2013_m5s_provincia$votes_ratio, 
            election2013_m5s_provincia$members_ratio)

# Europee 2014
electionday2014 <- as.Date('2014-05-25')
load("minint_election_data_2013-2014.RData")
italy_map_2014 <- 
  readOGR(dsn = "/Users/francesco/Desktop/GIS_Data/Administrative units/Italy/ISTAT/Prov2014_WGS84_g", layer = "Prov2014_WGS84_g", verbose=FALSE) # not provided

meetup_user <-  sqLiteConnect("m5s_meetup_jun2015.sqlite", "member")
meetup_user <- subset(meetup_user, joined < electionday2014)
meetup_user <- meetup_user[meetup_user$lat >=  italy_box_y[1] & meetup_user$lat <= italy_box_y[2], ]
meetup_user <- meetup_user[meetup_user$lon >=  italy_box_x[1] & meetup_user$lon <=  italy_box_x[2], ]


# Load Voti M5S
voti_m5s <- subset(par_eu_2014_liste, 
                   DESCCP == "MOVIMENTO 5 STELLE BEPPEGRILLO.IT" &
                     CODTIPOELEZIONE == "E")
voti_m5s <- merge(voti_m5s, 
                  subset(par_eu_2014_scrutini,
                         CODTIPOELEZIONE == "E",
                         select = c("NUMENTE", "NUMELETTORI", "NUMVOTANTITOTALI","SKNONVALIDE")),
                  by = "NUMENTE")
voti_m5s$VOTIVALIDI <- with(voti_m5s, NUMVOTANTITOTALI - SKNONVALIDE)
voti_m5s$votem5s_perc <- with(voti_m5s, TOTVOTI / VOTIVALIDI)
votem5s_nat_perc <- with(voti_m5s, sum(TOTVOTI) / sum(VOTIVALIDI))
voti_m5s$NUMENTE <- as.factor(voti_m5s$NUMENTE)
voti_m5s$votem5s_exp <- voti_m5s$VOTIVALIDI * votem5s_nat_perc
voti_m5s$votem5s_sr <- with(voti_m5s, TOTVOTI / votem5s_exp)
load("~/ownCloud/dati_elezioni_italiane/data/minint_istat_junct_tables.RData")
voti_m5s <- merge(voti_m5s, 
                  minint_istat_2014_junct_table[,c('cod_comune_minint','cod_comune_istat')], 
                  by.x = "NUMENTE", by.y = "cod_comune_minint") 

italy_map_comuni_2014 <-
  as.data.frame(readOGR(dsn = "/ISTAT/Com_2014_WGS84_g", layer = "Com2014_WGS84_g", verbose=FALSE)) # not provided

prov_comuni_join <- italy_map_comuni_2014[,c('COD_PRO','PRO_COM')]
voti_m5s <- merge(voti_m5s, prov_comuni_join, by.x = 'cod_comune_istat', by.y = 'PRO_COM', all.x = TRUE)

require(dplyr)
voti_m5s_by_provincia <-
  voti_m5s %>%
  dplyr::group_by(COD_PRO) %>%
  dplyr::summarize(voti = sum(TOTVOTI),
                   elettori = sum(NUMELETTORI),
                   votanti = sum(NUMVOTANTITOTALI))
voti_m5s_by_provincia <-
  merge(voti_m5s_by_provincia, 
        as.data.frame(italy_map_2014[,c('COD_PRO','PROVINCIA')]),
        all.x = TRUE,
        by = 'COD_PRO')

## Count participation by comune
xy <- cbind(meetup_user$lon, meetup_user$lat)
sp_points <-  SpatialPoints(xy)
proj4string(sp_points) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

## Transform
utmStr <- "+proj=utm +zone=%d +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
crs <- CRS(sprintf(utmStr, 32))
italy_map_2014 <- spTransform(italy_map_2014, crs)
sp_points <- spTransform(sp_points, crs)

res <- over(sp_points, italy_map_2014)
meetup_user$provincia <- res$PROVINCIA

# Out-at-sea
meetup_user_out_at_sea <- subset(meetup_user, is.na(provincia))
xy <- cbind(meetup_user_out_at_sea$lon, meetup_user_out_at_sea$lat)
sp_points <-  SpatialPoints(xy)
proj4string(sp_points) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
sp_points <- spTransform(sp_points, crs)

# ### Set up container for results
n <- length(sp_points)
nearestProvincia <- character(n)
for (i in seq_along(nearestProvincia)) {
  print(n - i)
  nearestProvincia[i] <-
    italy_map_2014$PROVINCIA[which.min(gDistance(sp_points[i,], italy_map_2014, byid=TRUE))]
}
meetup_user_out_at_sea$provincia <- nearestProvincia

meetup_user <- rbind(subset(meetup_user, !is.na(provincia)),
                     meetup_user_out_at_sea)
users_by_provincia <- as.data.frame(table(meetup_user$provincia))

voti_m5s_by_provincia <-
  merge(voti_m5s_by_provincia, users_by_provincia, all.x = TRUE,
        by.x = 'PROVINCIA', by.y = 'Var1')

names(voti_m5s_by_provincia)[3:6] <- c('votes', 'electors', 'voters', 'members') 

election2014_m5s_provincia <- voti_m5s_by_provincia

election2014_m5s_provincia$votes_ratio <- 
  with(election2014_m5s_provincia, votes / voters)
election2014_m5s_provincia$members_ratio <- 
  with(election2014_m5s_provincia, members / voters)

italy_map_2014 <-
  merge(italy_map_2014, 
        election2014_m5s_provincia[,c('COD_PRO','votes','electors','voters','members')],
        by = 'COD_PRO')

## MODEL 
mod2014 <- lm(log(votes) ~ log(voters) + log(members), 
                data = election2014_m5s_provincia)
mod2014_b <- lm(votes_ratio ~ 
                  log(voters) + members_ratio, 
                data = election2014_m5s_provincia)
scatterplot(election2014_m5s_provincia$votes_ratio, 
            election2014_m5s_provincia$members_ratio)

save(mod2010, mod2013, mod2014, 
     election2010_m5s_provincia, 
     election2013_m5s_provincia, 
     election2014_m5s_provincia, file = '02_03_m5s_meetup_vote_lm_mod.RData')

save(italy_map_2010, italy_map_2013, italy_map_2014, 
     file = '02_03_m5s_meetup_vote_spatial_data.RData')

